/*
	Purpose: Appends earlier cleaned survey data, 
	         1936 data, and 1940 Census data. Each 
	         decade `d' of the post-1930 survey 
	         cohorts is appended to Census data 
	         from decade `d+1'
	
	Creates: 3b_3surveys_19`d'0survey, where `d' = {1,2,...,7}
*/


clear all
set more off
set maxvar 10000


**--------------------------------------------------------------------------------**
**--------------------------------------------------------------------------------**

***************************
*** SURVEY DATA
***************************

	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear
	
	keep if baseline_sample==1
	gen surveys=1
	gen census=0

	rename wgt_sex_race weight

	* Keep relevant variables 
	keep race south_merge fatheroccej sex fam_inc_real log_son_baseline rank_son_baseline age agesq dob weight survey_year decade surveys census
	
	tempfile surveys
	save `surveys'
		
**--------------------------------------------------------------------------------**
**--------------------------------------------------------------------------------**	

***************************
*** 1940 CENSUS + 1936 DATA 
***************************
	
* Bring in cleaned surveys
	use `surveys', clear

* Append 1940 Census data of fathers ages 30-50 
	preserve
		use "$CensusData/input/Census1940_fathers_ages30to50_forIncomeScores.dta", clear 
		
		gen census=1
		
		rename occ1950ej fatheroccej
		drop if fatheroccej==81 | fatheroccej==21 | fatheroccej==99
		
		drop number_* occscore incwage region edu fam_income

		tempfile tempcens
		save `tempcens'
	restore

	append using `tempcens'	

* Append 1936 farmer and self-employment data 
	preserve 
	use "$Survey1936/output/ConsumptionSurvey_1936_foranalysis.dta", clear
	drop if tot_fam_inc==0
	
	rename occ1950ej fatheroccej
	keep if fatheroccej==81 | fatheroccej==21
	gen survey1936=1
	
	keep tot_fam_inc fatheroccej race south_merge age agesq survey1936

	tempfile tempie
	save `tempie'
	
	restore 
	
	append using `tempie'
	
**--------------------------------**	

* Make income variables
	
	clonevar hh_income_1936fix = hh_income if census==1 
	replace hh_income_1936fix = tot_fam_inc if survey1936==1 
	assert hh_income_1936fix!=. if census==1 | survey1936==1
	label var hh_income_1936fix "Household income, 1936 fix" 
	
	gen log_father_hh_income = log(hh_income_1936fix)
	label var log_father_hh_income "Log household income, Census with 1936 fix"


**--------------------------------**

* Other helpful variables 

	label var survey1936 "1936 Survey"
	label var surveys "Baseline sample surveys"

	gen data_source = ""
		replace data_source = "surveys" if surveys==1
		replace data_source = "census1940" if census==1
		replace data_source = "consump_survey1936" if survey1936==1
	tab data_source, m
	label var data_source "Source of obs (i.e., surveys, Census, or 1936 survey)"
	
	replace weight=1 if data_source=="census1940" | data_source=="consump_survey1936"

**-----------------------------------------------**
/* EARLIER SURVEY COHORTS: Save 3 separate 
                           datasets with 
                           correct samples */
**-----------------------------------------------**

	foreach d in 1 2 3 {

	display "Year 19`d'0"

	preserve
		* Restrict sample based on cohorts
		keep if survey1936==1 | census==1 | (surveys==1 & decade==19`d'0)
		
		/* Treat 1936 obs (occ ==21 or 81) like they are Census obs for the 
		   purpose of making triplets. Only relevant for earlier survey
		   cohorts.
		 */
		replace census=1 if survey1936==1

		* Make triplets 
		local var_list "fatheroccej race south_merge" 
		egen triplet = group(`var_list')
		
		bysort census triplet: gen tag = _n==1 //tag first observation of triplet
		bysort triplet: egen number_samples = sum(tag) 
		tab number_samples 
		keep if number_samples==2 //keep triplet if it's in Census and the surveys

		save "$Mydirectory1/3_Output/3b_3surveys_19`d'0survey", replace
	restore

	}

**--------------------------------------------------------------------------------**
**--------------------------------------------------------------------------------**	

*******************************
* PREPARE 1950-1980 CENSUSES
*******************************
	
* 1950
	preserve 
	use "$CensusData/output/Census1950_fathers_ages30to50.dta", clear
		
		rename slwt weight 
		
		keep weight race south_merge fatheroccej census log_father_inctot
		
		rename log_father_inctot log_father_hh_income
		
		tempfile cens1950
		save `cens1950'
	restore 
	
* 1960	
	preserve 
	
	use "$CensusData/output/Census1960_fathers_ages30to50.dta", clear
		
		rename perwt weight 
		
		keep weight race south_merge fatheroccej census log_father_hh_income
		
		tempfile cens1960
		save `cens1960'
	restore 
	
* 1970
	preserve 
	
	use "$CensusData/output/Census1970_fathers_ages30to50.dta", clear
		
		rename perwt weight 
		
		keep weight race south_merge fatheroccej census log_father_hh_income
		
		tempfile cens1970
		save `cens1970'
	restore 	
	
* 1980	
	preserve 
	
	use "$CensusData/output/Census1980_fathers_ages30to50.dta", clear
		
		rename perwt weight 
		
		keep weight race south_merge fatheroccej census log_father_hh_income
		
		tempfile cens1980
		save `cens1980'
	restore 	
		
**-----------------------------------------------**
/* LATER SURVEY COHORTS: Save 4 separate 
                         datasets with 
                         correct samples */
**-----------------------------------------------**
	
	foreach d in 4 5 6 7 {
	
		local d2 = `d'+1

		display "Year 19`d'0"
		
		use `surveys', clear
		keep if decade==19`d'0
		
		append using `cens19`d2'0'
		tab census
	
	* Make triplets 
		local var_list "fatheroccej race south_merge" 
		egen triplet = group(`var_list')
		
		bysort census triplet: gen tag = _n==1 //tag first observation of triplet
		bysort triplet: egen number_samples = sum(tag) 
		tab number_samples 
		keep if number_samples==2 //keep triplet if it's in both Census and surveys

		save "$Mydirectory1/3_Output/3b_3surveys_19`d'0survey.dta", replace
	
	}
	

	